In this notebook, I plot the ACC from different climate models and compare them to the World Ocean Atlas (WOA). The packages used here are part of the Pangeo project.
from dask.distributed import Client, progress
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=30)
cluster
VBox(children=(HTML(value='<h2>KubeCluster</h2>'), HBox(children=(HTML(value='\n<div>\n <style scoped>\n .…
client = Client(cluster)
client
Client
|
Cluster
|
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
import pandas as pd
import xarray as xr
import gcsfs
import gsw
from tqdm import tqdm_notebook as tqdm
import cartopy.crs as ccrs
import cartopy
import xesmf as xe
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import urllib.request
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.ticker as mticker
import matplotlib as mpl
import cmocean
from xhistogram.xarray import histogram
%matplotlib inline
#plt.rcParams['figure.figsize'] = 20, 10
%config InlineBackend.figure_format = 'retina'
colors = [[0.007843, 0.452157, 0.603333, 1],
[0.007843, 0.452157, 0.653333, 1],
[0.007843, 0.592157, 0.733333, 1],
[0.254902, 0.694118, 0.8, 1],
[0.401961, 0.796078, 0.866667, 1],
[0.5, 0.85, 0.9, 1],
[0.70902, 0.898039, 0.933333, 1],
[0.75, 0.92, 0.933333, 1],
[0.8, 0.958039, 0.933333, 1],
[1, 0.898039, 0.788235, 1],
[1, 0.85, 0.68, 1],
[1, 0.8, 0.580392, 1],
[0.996078, 0.701961, 0.372549, 1],
[0.996078, 0.6, 0.160784, 1],
[0.9, 0.5, 0.160784, 1],
[0.9000004, 0.4, 0.160784, 1],
[0.9, 0.3, 0.160874, 1]]
Becki = ListedColormap(colors)
def Catalog_filter(catalog, var, ense, exp, freq):
'''
Function that takes LISTS of:
var = desired variable name
ense = desired ensembles
exp = deisred epxeriments
freq = desired sample rate (frequency)
and returns list of model names and filtered catalog
'''
# Creates a new dataframe based on the filters
df_filter = catalog[(catalog['experiment_id'].isin(exp)) & \
(catalog['table_id'].isin(freq)) & \
(catalog['member_id'].isin(ense)) & \
(catalog['variable_id'].isin(var))]
models = df_filter.source_id.unique()
return models, df_filter
def get_data(df, variable, model, t_min, t_max):
'''
Function that downloads variable data from google-cloud
and returns xarray dataset with all the variable data
IMPORANT - variables in list should have same dimensions!!!
'''
ds_l = []
for var in variable:
uri = df[(df.variable_id == var) &
(df.source_id == model)].zstore.values[0]
gcs = gcsfs.GCSFileSystem(token='anon')
ds = xr.open_zarr(gcs.get_mapper(uri), consolidated=True)
ds_l.append(ds)
ds_full = xr.merge(ds_l, compat='override')\
.sel(time=slice(t_min, t_max))
return ds_full
df = pd.read_csv('https://storage.googleapis.com/pangeo-cmip6/pangeo-cmip6-zarr-consolidated-stores.csv')
df
variable_ids = ['so', 'thetao']
experiment_ids = ['historical']
table_ids = ['Omon']
member_ids = ['r1i1p1f1']
source_list, df2 = Catalog_filter(df, variable_ids,
member_ids, experiment_ids,
table_ids)
for (num,item) in enumerate(source_list):
print(num,item)
0 AWI-CM-1-1-MR 1 BCC-CSM2-MR 2 BCC-ESM1 3 CAMS-CSM1-0 4 FGOALS-f3-L 5 E3SM-1-0 6 EC-Earth3-Veg 7 GISS-E2-1-G-CC 8 CESM2-WACCM 9 CESM2 10 GFDL-CM4 11 GFDL-ESM4 12 NESM3 13 SAM0-UNICON 14 MCM-UA-1-0
ds_set = {}
for key in tqdm(source_list):
ds_set[key] = get_data(df2, variable_ids, key, '1986', '2005')
HBox(children=(IntProgress(value=0, max=15), HTML(value='')))
ds_ob_S = xr.open_mfdataset('WOA-18/Salinity/*.nc',
decode_times=False, combine='nested', concat_dim='time')
def Regridding(data, grid_out, lat, lon):
grid_11 = xr.Dataset({ lat : (grid_out.lat), lon : (grid_out.lon)})
regrid = xe.Regridder(data, grid_11,
'bilinear', periodic=True, reuse_weights=True)
return regrid(data)
def plot_avg(data, **kwargs):
ax = plt.axes(projection=ccrs.Orthographic(central_latitude=-90.0))
ax.coastlines(resolution='10m',zorder=3) # zorder=3 makes sure that no other plots overlay the coastlines
gl = ax.gridlines(color='black', alpha=0.7)
gl.ylocator = mticker.FixedLocator([-65, -45, 0])
ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
data.plot(ax = ax, transform=ccrs.PlateCarree(),
vmin = 34.4 , vmax=35.81 , levels=17 ,
cmap=Becki, cbar_kwargs={'shrink': 0.7})
def plot_diff(data, **kwargs):
ax = plt.axes(projection=ccrs.Orthographic(central_latitude=-90.0))
ax.coastlines(resolution='10m',zorder=3) # zorder=3 makes sure that no other plots overlay the coastlines
gl = ax.gridlines(color='black', alpha=0.7)
gl.ylocator = mticker.FixedLocator([-65, -45, 0])
ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
data.plot(ax = ax, transform=ccrs.PlateCarree(),
vmin = -1 , vmax=1 ,
cmap=cmocean.cm.balance, cbar_kwargs={'shrink': 0.7})
def plot_sub(data, ax, **kwargs):
ax.coastlines(resolution='10m',zorder=3)
gl = ax.gridlines(color='black', alpha=0.7)
gl.ylocator = mticker.FixedLocator([-65, -45, 0])
ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
data.plot(ax = ax, transform=ccrs.PlateCarree(),
vmin = 34.4 , vmax=35.81 , levels=17 ,
cmap=Becki, add_colorbar=False)
def plot_subdiff(data, ax, **kwargs):
ax.coastlines(resolution='10m',zorder=3) # zorder=3 makes sure that no other plots overlay the coastlines
gl = ax.gridlines(color='black', alpha=0.7)
gl.ylocator = mticker.FixedLocator([-65, -45, 0])
ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
data.plot(ax = ax, transform=ccrs.PlateCarree(),
vmin = -1 , vmax=1 ,
cmap=cmocean.cm.balance, add_colorbar=False)
#for key in ds_set.keys():
# print(f'{key}: {ds_set[key].coords} \n ')
# Preparing the data for timely average and z (depth) average
# Checking the size after averaging
def name_fix(ds):
if ('longitude' in ds.dims) and ('latitude' in ds.dims):
ds = ds.rename({'longitude':'lon', 'latitude': 'lat'})
if ('depth' in ds.dims):
ds = ds.rename({'depth': 'lev'})
return ds
ds_m = {}
for key in ds_set.keys():
ds_set[key] = name_fix(ds_set[key])
ds_m[key] = ds_set[key].mean(dim='time').mean(dim='lev')
print(f'Size of {key}: {ds_m[key].so.size/1e6} MB \n ')
#print(f'{key}: {ds_set[key].coords} \n ')
Size of AWI-CM-1-1-MR: 0.830305 MB Size of BCC-CSM2-MR: 0.08352 MB Size of BCC-ESM1: 0.08352 MB Size of CAMS-CSM1-0: 0.072 MB Size of FGOALS-f3-L: 0.07848 MB Size of E3SM-1-0: 0.0648 MB Size of EC-Earth3-Veg: 0.105704 MB Size of GISS-E2-1-G-CC: 0.05184 MB Size of CESM2-WACCM: 0.0648 MB Size of CESM2: 0.12288 MB Size of GFDL-CM4: 1.5552 MB Size of GFDL-ESM4: 0.41472 MB Size of NESM3: 0.105704 MB Size of SAM0-UNICON: 0.12288 MB Size of MCM-UA-1-0: 0.01536 MB
for key in tqdm(ds_m.keys()):
ds_m[key].load()
HBox(children=(IntProgress(value=0, max=15), HTML(value='')))
# Return new dictionary with only coordinates that are not curvilinear
# (Have the same structure as the observational)
# GFDL Y and X correspond to lat and lon respectively
ds_regrid = {}
for key in tqdm(sorted(ds_m)[1:]):
if ('lon' in ds_m[key].dims) and ('lat' in ds_m[key].dims):
ds_regrid[key] = Regridding(ds_m[key], ds_ob_S, 'lat', 'lon')
if ('x' in ds_m[key].dims) and ('y' in ds_m[key].dims):
ds_regrid[key] = Regridding(ds_m[key], ds_ob_S, 'y', 'x')
HBox(children=(IntProgress(value=0, max=14), HTML(value='')))
Reuse existing file: bilinear_232x360_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_232x360_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_180x360_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_180x360_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_1080x1440_180x360_peri.nc
using dimensions ('y', 'x') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_576x720_180x360_peri.nc
using dimensions ('y', 'x') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_180x288_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
Reuse existing file: bilinear_80x192_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable so as the horizontal dimensions for this dataset.
for key in tqdm(ds_regrid.keys()):
ds_regrid[key].load()
HBox(children=(IntProgress(value=0, max=8), HTML(value='')))
fig = plt.figure(figsize=(20,10))
for index, (key, value) in enumerate(ds_regrid.items()):
ax1 = fig.add_subplot(2,4,index+1, projection=ccrs.Orthographic(central_latitude=-90.0))
plot_sub(ds_regrid[key].so, ax1)
plt.title(f'{key}')
fig, ax = plt.subplots(figsize=(19.7, 1))
fig.subplots_adjust(bottom=0.5)
cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=34.4, vmax=35.81)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=Becki,
norm=norm,
orientation='horizontal')
cb1.set_label('Salinity')
fig.show()
def url_to_xray(urls_l):
'''
Function that takes urls for ONE VARIABLE and returns
a concatenated xarray
'''
xrays = []
for key in tqdm(urls_l):
xray = xr.open_dataset(urls_l[key], decode_times=False)
xrays.append(xray)
xrays_f = xr.concat(xrays, dim='time')
return xrays_f
urls_T = {'WOA_T_85_94': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/temperature/8594/1.00/woa18_8594_t00_01.nc',
'WOA_T_95_04': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/temperature/95A4/1.00/woa18_95A4_t00_01.nc'
}
urls_S = {'WOA_S_85-94': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/salinity/8594/1.00/woa18_8594_s00_01.nc',
'WOA_S_95-04': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/salinity/95A4/1.00/woa18_95A4_s00_01.nc'
}
xr_obs_T = url_to_xray(urls_T)
xr_obs_S = url_to_xray(urls_S)
xr_obs_full = xr.merge([xr_obs_T, xr_obs_S])
xr_obs_full
HBox(children=(IntProgress(value=0, max=2), HTML(value='')))
HBox(children=(IntProgress(value=0, max=2), HTML(value='')))
ds_obs_f = xr_obs_full.mean(dim='time').mean(dim='depth')
plt.figure(figsize=(10,5))
plot_avg(ds_obs_f.s_an)
plt.title(f'WOA-18 Salinity average 1985-2005')
fig = plt.figure(figsize=(20,10))
ds_diff = {}
for index, (key, value) in enumerate(ds_regrid.items()):
ds_diff[key] = ds_regrid[key].so - ds_obs_f.s_an
ax1 = fig.add_subplot(2,4,index+1, projection=ccrs.Orthographic(central_latitude=-90.0))
plot_subdiff(ds_diff[key], ax1)
plt.title(f'{key} minus WOA-18')
fig, ax = plt.subplots(figsize=(19.7, 1))
fig.subplots_adjust(bottom=0.5)
cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=-1, vmax=1)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmocean.cm.balance,
norm=norm,
orientation='horizontal')
cb1.set_label('Salinity Difference')
fig.show()
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/core/nanops.py:140: RuntimeWarning: Mean of empty slice return np.nanmean(a, axis=axis, dtype=dtype)
for key in ds_regrid.keys():
plt.figure(figsize=(10,5))
plot_avg(ds_regrid[key].so)
plt.title(f'{key}')
ds_obs_f = xr_obs_full.mean(dim='time').mean(dim='depth')
plt.figure(figsize=(10,5))
plot_avg(ds_obs_f.s_an)
plt.title(f'WOA-18 Salinity average 1985-2005')
ds_diff = {}
for key in ds_regrid.keys():
ds_diff[key] = ds_regrid[key].so - ds_obs_f.s_an
plt.figure(figsize=(10,5))
plot_diff(ds_diff[key])
plt.title(f'{key} minus WOA-18')
fig = plt.figure(figsize=(20,10))
for index, (key, value) in enumerate(ds_regrid.items()):
ax1 = fig.add_subplot(2,4,index+1, projection=ccrs.Orthographic(central_latitude=-90.0))
fi = plot_sub(ds_regrid[key].so, ax1)
plt.title(f'{key}')
rgr = xe.Regridder(ds_m['BCC-CSM2-MR'], ds_ob_S,
'bilinear', periodic=True, reuse_weights=True)
ds_m['AWI-CM-1-1-MR'].compute()
plots = plt.subplots(projection=ccrs.Orthographic(central_latitude=-90.0))
#ax.set_extent([-180, 180, -90, -60], ccrs.PlateCarree())
# ax.coastlines(resolution='10m',zorder=3) # zorder=3 makes sure that no other plots overlay the coastlines
# ax.gridlines(color='gray')
# ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
plt.figure()
plot_avg(ds_plt.so)
plt.title(f'{GCM}')
plt.figure()
plot_avg(ds_plt_obs)
plt.title('WOA-18')
ds_plt['diff'] = ds_plt['so'] - ds_plt_obs
plt.figure()
plot_avg(ds_plt.so)
plt.title(f'{GCM}')
plt.savefig('GCM.png')
plt.figure()
plot_avg(ds_plt_obs)
plt.title('WOA-18')
plt.savefig('WOA-18.png')
plt.figure()
plot_diff(ds_plt['diff'])
plt.title('Difference')
plt.savefig('GCM-WOA18.png')
def url_to_xray(urls_l):
'''
Function that takes urls for ONE VARIABLE and returns
a concatenated xarray
'''
xrays = []
for key in urls_l:
xray = xr.open_dataset(urls_l[key], decode_times=False)
xrays.append(xray)
xrays_f = xr.concat(xrays, dim='time')
return xrays_f
urls_T = {'WOA_T_85_94': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/temperature/8594/1.00/woa18_8594_t00_01.nc',
'WOA_T_95_04': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/temperature/95A4/1.00/woa18_95A4_t00_01.nc'
}
urls_S = {'WOA_S_85-94': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/salinity/8594/1.00/woa18_8594_s00_01.nc',
'WOA_S_95-04': 'https://data.nodc.noaa.gov/thredds/dodsC/ncei/woa/salinity/95A4/1.00/woa18_95A4_s00_01.nc'
}
#ds_obs_T = url_to_xray(urls_T)
ds_obs_S = url_to_xray(urls_S)
#ds_obs_S.load()
#ds_obs = xr.merge([xr_obs_T, xr_obs_S])
#ds_obs
bs = xr.open_dataset('woa18_8594_s00_01.nc', decode_times=False)
plt.figure(figsize=(10,20))
plot_avg(bs.mean(dim='depth'),'s_an')
xr_obs_S[1]['s_an'].mean(dim='depth')
xr_obs_S[1].mean(dim='depth').s_an
def create_pressure(data, var):
'''
Creates new pressure array from depth and latitudes using TEOS-10
Input: xarray
Output: xarray with pressure as new variable
'''
z = -1*xr_obs_full.depth
p = np.zeros((len(data.depth),len(data.lat)))
for i in range(len(data.lat)):
p[:,i] = gsw.p_from_z(z, data.lat[i])
b = np.repeat(p[:, :, np.newaxis], len(data.lon), axis=2)
P = np.empty_like(data[var])
for i in range(P.shape[0]):
P[i,:,:,:] = b
xray = xr.DataArray(P,
coords=xr_obs_full[var].coords,
dims=xr_obs_full[var].dims,
name ='Pressure')
data2 = xr.merge([data, xray])
return data2
par = 't_an'
ds = create_pressure(xr_obs_full, par)
def Regridding(data, grid_out):
grid_11 = xr.Dataset({'lat': (grid_out.lat), 'lon': (grid_out.lon)})
regrid = xe.Regridder(data, grid_11,
'bilinear', periodic=True, reuse_weights=True)
return regrid(data)
def plot_avg(data, var):
ax = plt.axes(projection=ccrs.Orthographic(central_latitude=-90.0))
ax.coastlines(resolution='10m',zorder=3) # zorder=3 makes sure that no other plots overlay the coastlines
ax.gridlines(color='gray')
ax.add_feature(cartopy.feature.LAND, zorder=1,facecolor=cartopy.feature.COLORS['land_alt1'])
data[var].plot(ax = ax, transform=ccrs.PlateCarree(),
vmin=34.58, vmax=34.83, cbar_kwargs={'shrink': 0.3},
levels=17)
ds_plt = Regridding(ds.sel(time=slice('1986','2005')).mean(dim='time').mean(dim='lev'),
dss)
plt.figure(figsize=(10,20))
plot_avg(ds_plt,'so')
#ds = ds.sel(time=slice('1986','2005')).mean(dim='time').mean(dim='lev')
grid_11 = xr.Dataset({'lat': (dss.lat), 'lon': (dss.lon)})
ds
xray.sel(time=slice('1986','2005')).mean(dim='time').mean(dim='lev')
def Regridding(data, grid_out):
grid_11 = xr.Dataset({'lat': (grid_out.lat), 'lon': (grid_out.lon)})
regrid = xe.Regridder(data, grid_11,
'bilinear', periodic=True, reuse_weights=True)
return regrid(data)
for key in tqdm(xray_dic.keys()):
S = xray_dic[key].sel(time=slice('1986','2005')).mean(dim='time').mean(dim='lev')
S_gr = Regridding(S, ds)
S_gr.to_netcdf("\Shit\file_" + str(key) + ".nc")
#ds_plt = Regridding(S, ds)
#ds_plt.to_netcdf('Shit.nc')
fig = plt.figure(figsize=(12,20))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines()
ax.gridlines()
ds_plt.plot(ax = ax, transform=ccrs.PlateCarree())
def Regridding(data, grid_out):
grid_11 = xr.Dataset({'lat': (grid_out.lat), 'lon': (grid_out.lon)})
regrid = xe.Regridder(data, grid_11,
'bilinear', periodic=True, reuse_weights=True)
return regrid(data)
ds_plt = Regridding(xray_dic['BCC-CSM2-MR'], ds)
S = ds_plt.sel(time=slice('1986','1990')).mean(dim='time').mean(dim='lev').so
fig = plt.figure(figsize=(12,20))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines()
ax.gridlines()
S.plot(ax = ax, transform=ccrs.PlateCarree())
f.shape
xrays[0]['so'].lat.values
#Taking Monthly Ocean data (Omon), for Salinity (so) and Potential Temperature (thetao)
variable_ids = ['so', 'thetao']
experiment_ids = ['historical']
table_ids = ['Omon']
member_ids = ['r1i1p1f1']
source_ids = []
for name, group in df.groupby('source_id'):
if (all([expt in group.experiment_id.values
for expt in experiment_ids]) and
all([expt in group.variable_id.values
for expt in variable_ids]) and
all([expt in group.table_id.values
for expt in table_ids]) and
all([expt in group.member_id.values
for expt in member_ids])):
source_ids.append(name)
source_ids # Models with both monthly potential T and S
len(df_S.source_id.unique())
len(df_S.source_id.unique())
df_T
df_S